********************************************************************************
* 	I.  Prepare/order raw PPI data
* 	II. Prepare/order raw CPI data
* 	III.Prepare data for analyses: 17 manufacturing sectors, 31 countries
* 	IV. Prepare data for analyses: 17 manufacturing sectors, 31 countries (in USD)
* 	V.  Prepare data for analyses: 17 manufacturing + 1 service sectors, 31 countries
* 			a. Service sector: aggregated data
* 			b. Service sector: CPI services
* 	VI.	35 sectors 31 countries
********************************************************************************

********************************************************************************
* Main settings
********************************************************************************
clear
set more off

*global rootfolder ""
global folder "$rootfolder\analysis"
global inputfolder "$rootfolder\data"
global CSfolder "$rootfolder\analysis\CS"
global wiodfolder "$rootfolder\analysis\WIOD"
global fxfolder "$rootfolder\analysis\FX"

capture mkdir "$rootfolder\analysis\PPI"
global outputfolder "$rootfolder\analysis\PPI"

capture mkdir "$outputfolder\MatlabFiles"
global matlabfolder "$outputfolder\MatlabFiles"

********************************************************************************
global YEAR_START 1995
global YEAR_END 2011
********************************************************************************


***********************************
* I. Prepare/order PPI data
***********************************
use "$inputfolder\1_PPI\output\1_PPI_AllRegions_NACE_2D_data_added_interpol.dta", clear

* generate year&month number
rename month date
format date %10.0g
gen date2=dofm(date)
format date2 %d
gen month=month(date2)
gen year=year(date2)
drop date*

* use wiod sector notation
joinby nace_r1_1 using "$CSfolder\tempconcordances.dta", unm(both)
* drop non-goods sectors
keep if _merge==3 | _merge==2
drop _merge
* use wiod country notation
rename country nace_name
joinby nace_name using "$inputfolder\0_Concordances&OtherAuxiliaryFiles\Name_Concordance.dta", unm(both)
* drop ROW and EU countries
keep if _merge==3
drop _merge

gen date = ym(year, month)
egen id = group(nace_name nace_r1_1)
xtset id date
gen dppi = ln(ppi) - ln(l1.ppi)

collapse (mean) dppi , by(year month wiod_code wiod_name nace_name name oecdbtdi_name)

gen date = ym(year, month)

bys wiod_name wiod_code (date): gen sumldppi = sum(dppi)
gen const_ppi2 = exp(sumldppi) * 100
replace const_ppi2 = . if mi(dppi)

sort wiod_name wiod_code date
forvalues i = 1/`=_N' {
	 if !mi(dppi[`i'+1]) & mi(const_ppi2[`i']) {
		scalar temp_scalar = dppi[`i'+1]
		qui: replace const_ppi2 = const_ppi2[`i'+1]/exp(temp_scalar) in `i'
	 }
}

rename const_ppi2 ppi
drop sumldppi

drop dp* 	
drop date

rename nace_name country
merge 1:1 wiod_code country year month using "$inputfolder\1_PPI\output\1.6_PPI_Datastream_manual_wiod.dta", keepusing(ppi_manual)

* Countries for which the source Datastream
foreach i in AU CN BR ID IN JP KR MX RU TW {
	replace ppi = ppi_manual if country=="`i'"
}
drop _merge

rename country nace_name
joinby nace_name using "$inputfolder\0_Concordances&OtherAuxiliaryFiles\Name_Concordance.dta", unm(both) update
drop _merge
drop ppi_manual


drop nace_name name oecdbtdi_name 

order year month ppi 

* Take each set {c,s}_t into account (for c=1...n, s=1...m, t=T_0...T)
joinby year month wiod_name wiod_code using "$CSfolder\tempcsset.dta", unm(both)
keep if _merge==3 | _merge==2
drop _merge

sort year month c s
order year month c s ppi 

save "$outputfolder\PI_ordered_monthly.dta", replace
********************************************************************************

***********************************
* PPI data in USD
***********************************
use "$outputfolder\PI_ordered_monthly.dta", clear
rename c c_I
rename wiod_name country
merge m:1 year month c_I country using "$fxfolder\FX_rates_USDlevel_monthly.dta"
drop if _merge ==2
drop _merge
rename c_I c
rename country wiod_name

gen ppi_usd = ppi / FX_usd
drop ppi
rename ppi_usd ppi
label variable ppi "ppi in USD"

drop FX_usd

save "$outputfolder\PIusd_ordered_monthly.dta", replace
********************************************************************************

***********************************
* II. Prepare/order CPI services data
***********************************
use "$inputfolder\4_CPI\output\CPIserv.dta", clear
* Assign to a random service sector code in WIOD --> p: activities of households
gen wiod_code = "p"

* make year&month number
format date %10.0g
gen date2=dofm(date)
format date2 %d
gen month=month(date2)
gen year=year(date2)
drop date*

rename country nace_name
merge m:1 nace_name using "$inputfolder\0_Concordances&OtherAuxiliaryFiles\Name_Concordance.dta"
drop if _merge == 1
drop _merge name oecdbtdi_name nace_name
order year month wiod_name wiod_c cpi_serv

merge 1:1 year month wiod_name wiod_code using "$CSfolder\tempcsset.dta"
drop if _merge ==1
drop _merge
order year month c s cpi_serv wiod_code wiod_name
sort c s year month

save "$outputfolder\CPI_ordered_monthly.dta", replace
********************************************************************************


********************************************************************************
* III. 17M sectors 31 countries
* Prepare PPI: collapse to final sector list & interpolate
********************************************************************************
use "$outputfolder\PI_ordered_monthly.dta", clear

egen ID = group(c s)
gen date = ym(year,month)
format date %tg
xtset ID date, m

* Create log-changes 
gen chng_ppi = ln(ppi) - ln(l1.ppi)	

levelsof wiod_code, local(sctr_range) c

local Srest_candidates "50 51 52 h 60 61 62 63 64 j 70 71t74 l m n o p f"
	
gen missingppi = mi(chng_ppi)
bys wiod_name wiod_code: egen mean_mi = mean(missingppi)
drop missingppi

foreach ss in `Srest_candidates' {
	replace wiod_code = "Srest" if wiod_code == "`ss'"
}


replace s = 999 if wiod_code == "Srest"
foreach b of local sctr_range {
	 if `: list b in Srest_candidates' {
		local List_Srest `List_Srest' `b'
	 }
	else {
		local Li_Sectors `Li_Sectors' `b'
	}
 }
di "`List_Srest'"
di "`Li_Sectors'"
drop if wiod_code == "Srest"

collapse (mean) chng_* , by(year month c s wiod_code wiod_name date)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2011 & year >= $YEAR_START

gen tmp_tot_dummy = 1
bysort wiod_name: egen tot_p_ctry = total(tmp_tot_dummy)
gen tmp_nm_dummy = 0
replace tmp_nm_dummy = 1 if chng_ppi != .
bysort wiod_name: egen nm_p_ctry = total(tmp_nm_dummy)
gen nm_share = nm_p_ctry/tot_p_ctry
drop if wiod_name == "ROW"

levelsof wiod_name, local(ctry_range) c
levelsof wiod_name if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND" | wiod_name=="BRA"), local(ROW_candidates) c //note that we don't want to include turkey, india and brazil at this late stage so we force it in the ROW)
	foreach b of local ctry_range { 
	 if `: list b in ROW_candidates' { 
		local List_ROW `List_ROW' `b'
	 } 
	else {
		local Li_Ctrys `Li_Ctrys' `b'
	}	
 } 
	 
di "`List_ROW'"
di "`Li_Ctrys'"

global test `Li_Ctrys'
replace c = 999 if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND"| wiod_name=="BRA")
replace wiod_name = "ROW" if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND"| wiod_name=="BRA")

collapse (mean) chng_* , by(year month date c s wiod_code wiod_name)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2010 & year >= 1995

********************************************************************************
** Interpolation of missing PPI values 
********************************************************************************
egen ID = group(c s)
xtset ID date, m
	gen _extr_chng_ppi =  0
	replace _extr_chng_ppi =  1 if chng_ppi == .
	bys wiod_name date: egen ctry_miss_ppi = mean(_extr_chng_ppi)

levelsof wiod_name, local(ctry_range) c
levelsof wiod_code, local(sctr_range) c
local startyear = $YEAR_START
local endyear = $YEAR_END


** Imputation
foreach nn in `ctry_range' {
	foreach cc in `sctr_range' {
	*di "`nn' `cc'"
		qui sum chng_ppi if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
		if r(N) > 0 {

			qui regress chng_ppi i.month if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
		else {

			di "exception for empty bucket triggered in: ppi `nn' `cc'"
			qui xtreg chng_ppi i.month if wiod_n == "`nn'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
	}
}

qui sum _extr_chng_ppi if _extr_chng_ppi == 1 & year <= `endyear' & year >= `startyear'
local tot_extr = r(N)
qui sum _extr_chng_ppi if year <= `endyear' & year >= `startyear'
local tot = r(N)
local sh_etrap = `tot_extr'/`tot'

qui sum _extr_chng_ppi if chng_ppi != . & year <= `endyear' & year >= `startyear'
local tot_stillnm = r(N)
local sh_nonmiss = `tot_stillnm'/`tot'
di " "
di " "
di "-----------------Extrapolation Report--------------------"
di "The data set is only extraplolated in the period from `startyear' to `endyear'"
di " "
di "Share of interpoalted observations in variable chng_ppi is: `sh_etrap'"
di " "
di "Share of non-missing observations in variable ppi after extrapolation: `sh_nonmiss'"

********************************************************************************

rename wiod_code country_ind
rename wiod_name country

* Price changes are assigned to the partner country
gen partner = country
gen partner_ind = country_ind
order date partner partner_ind
sort date partner partner_ind
format chng_ppi %12.0g

outsheet using "$outputfolder\PPI_MATLAB_monthly_trunc.csv", comma replace
drop c s ID
save "$outputfolder\NM_Price_changes_monthly_trunc.dta", replace

********************************************************************************
* Export to csv as a vector to be used in Matlab
********************************************************************************
use "$outputfolder\NM_Price_changes_monthly_trunc.dta", clear
forvalues mo = 1/12 {
	forvalues yr = $YEAR_START/$YEAR_END {
		preserve 
		
		keep date year month country country_ind chng_ppi
		keep if year == `yr' & month == `mo'
		
		outsheet using "$matlabfolder\PPI_`yr'`mo'.csv", comma replace
		restore
		
	}
}

********************************************************************************
* IV. 17M sectors 31 countries
* Prepare PPI in USD: collapse to final sector list & interpolate
********************************************************************************
use "$outputfolder\PIusd_ordered_monthly.dta", clear

egen ID = group(c s)
gen date = ym(year,month)
format date %tg
xtset ID date, m

	
* Create log-changes
gen chng_ppi = ln(ppi) - ln(l1.ppi)	

levelsof wiod_code, local(sctr_range) c

local Srest_candidates "50 51 52 h 60 61 62 63 64 j 70 71t74 l m n o p f"
	
gen missingppi = mi(chng_ppi)
bys wiod_name wiod_code: egen mean_mi = mean(missingppi)
drop missingppi

foreach ss in `Srest_candidates' {
	replace wiod_code = "Srest" if wiod_code == "`ss'"
}


replace s = 999 if wiod_code == "Srest"
foreach b of local sctr_range {
	 if `: list b in Srest_candidates' {
		local List_Srest `List_Srest' `b'
	 }
	else {
		local Li_Sectors `Li_Sectors' `b'
	}
 }
di "`List_Srest'"
di "`Li_Sectors'"
drop if wiod_code == "Srest"

collapse (mean) chng_* , by(year month c s wiod_code wiod_name date)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2011 & year >= $YEAR_START

gen tmp_tot_dummy = 1
bysort wiod_name: egen tot_p_ctry = total(tmp_tot_dummy)
gen tmp_nm_dummy = 0
replace tmp_nm_dummy = 1 if chng_ppi != .
bysort wiod_name: egen nm_p_ctry = total(tmp_nm_dummy)
gen nm_share = nm_p_ctry/tot_p_ctry
drop if wiod_name == "ROW"

levelsof wiod_name, local(ctry_range) c

local ROW_candidates "BRA CYP EST IDN IND LUX LVA MLT SVK TUR" 

	foreach b of local ctry_range { 
	 if `: list b in ROW_candidates' { 
		local List_ROW `List_ROW' `b'
	 } 
	else {
		local Li_Ctrys `Li_Ctrys' `b'
	}	
 } 
	 
di "`List_ROW'"
di "`Li_Ctrys'"

global test `Li_Ctrys'
replace c = 999 if (wiod_name == "BRA" | wiod_name == "CYP" | wiod_name == "EST" | wiod_name == "IDN" | wiod_name == "IND" | wiod_name=="LUX" | wiod_name == "LVA" | wiod_name == "MLT" | wiod_name == "SVK" | wiod_name == "TUR")
replace wiod_name = "ROW" if (wiod_name == "BRA" | wiod_name == "CYP" | wiod_name == "EST" | wiod_name == "IDN" | wiod_name == "IND" | wiod_name=="LUX" | wiod_name == "LVA" | wiod_name == "MLT" | wiod_name == "SVK" | wiod_name == "TUR")

collapse (mean) chng_* , by(year month date c s wiod_code wiod_name)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2010 & year >= 1995

********************************************************************************
** Interpolation of missing PPI values 
********************************************************************************
egen ID = group(c s)
xtset ID date, m
	gen _extr_chng_ppi =  0
	replace _extr_chng_ppi =  1 if chng_ppi == .
	bys wiod_name date: egen ctry_miss_ppi = mean(_extr_chng_ppi)

levelsof wiod_name, local(ctry_range) c
levelsof wiod_code, local(sctr_range) c
local startyear = $YEAR_START
local endyear = $YEAR_END

**  Imputation
foreach nn in `ctry_range' {
	foreach cc in `sctr_range' {
	*di "`nn' `cc'"
		qui sum chng_ppi if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
		if r(N) > 0 {
			qui regress chng_ppi i.month if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
		else {
			di "exception for empty bucket triggered in: ppi `nn' `cc'"
			qui xtreg chng_ppi i.month if wiod_n == "`nn'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
	}
}

qui sum _extr_chng_ppi if _extr_chng_ppi == 1 & year <= `endyear' & year >= `startyear'
local tot_extr = r(N)
qui sum _extr_chng_ppi if year <= `endyear' & year >= `startyear'
local tot = r(N)
local sh_etrap = `tot_extr'/`tot'

qui sum _extr_chng_ppi if chng_ppi != . & year <= `endyear' & year >= `startyear'
local tot_stillnm = r(N)
local sh_nonmiss = `tot_stillnm'/`tot'
di " "
di " "
di "-----------------Extrapolation Report--------------------"
di "the data set is only extraplolated in the period from `startyear' to `endyear'"
di " "
di "Share of interpoalted observations in variable chng_ppi is: `sh_etrap'"
di " "
di "Share of non-missing observations in variable ppi after extrapolation: `sh_nonmiss'"

********************************************************************************


rename wiod_code country_ind
rename wiod_name country

* Price changes are assigned to the partner country
gen partner = country
gen partner_ind = country_ind
order date partner partner_ind
sort date partner partner_ind
format chng_ppi %12.0g

outsheet using "$outputfolder\PPI_inUSD_MATLAB_monthly_trunc.csv", comma replace
drop c s ID
save "$outputfolder\NM_Price_changes_inUSD_monthly_trunc.dta", replace

********************************************************************************
* Export to csv as a vector to be used in Matlab
********************************************************************************
use "$outputfolder\NM_Price_changes_inUSD_monthly_trunc.dta", clear
forvalues mo = 1/12 {
	forvalues yr = $YEAR_START/$YEAR_END {
		preserve 
		
		keep date year month country country_ind chng_ppi
		keep if year == `yr' & month == `mo'
		
		outsheet using "$matlabfolder\PPI_inUSD_`yr'`mo'.csv", comma replace
		restore
		
	}
}


********************************************************************************
* V. 17M + 1S sectors 31 countries
* Prepare for imputation & impute
********************************************************************************
use "$outputfolder\PI_ordered_monthly.dta", clear
merge 1:1 year month c s wiod_code wiod_name using "$outputfolder\CPI_ordered_monthly.dta"
drop _merge

egen ID = group(c s)
gen date = ym(year,month)
format date %tg
xtset ID date, m
	
* Create log-changes 
gen chng_ppi = ln(ppi) - ln(l1.ppi)	
gen chng_cpi_serv = ln(cpi_serv) - ln(l1.cpi_serv)

drop if year> $YEAR_END

levelsof wiod_code, local(sctr_range) c

local Srest_candidates "50 51 52 h 60 61 62 63 64 j 70 71t74 l m n o p f"
	
gen missingppi = mi(chng_ppi)
bys wiod_name wiod_code: egen mean_mi = mean(missingppi)
drop missingppi

foreach ss in `Srest_candidates' {
	replace wiod_code = "Srest" if wiod_code == "`ss'"
}


replace s = 999 if wiod_code == "Srest"
foreach b of local sctr_range {
	 if `: list b in Srest_candidates' {
		local List_Srest `List_Srest' `b'
	 }
	else {
		local Li_Sectors `Li_Sectors' `b'
	}
 }
di "`List_Srest'"
di "`Li_Sectors'"

collapse (mean) chng_* , by(year month c s wiod_code wiod_name date)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2011 & year >= $YEAR_START

gen tmp_tot_dummy = 1
bysort wiod_name: egen tot_p_ctry = total(tmp_tot_dummy)
gen tmp_nm_dummy = 0
replace tmp_nm_dummy = 1 if chng_ppi != .
bysort wiod_name: egen nm_p_ctry = total(tmp_nm_dummy)
gen nm_share = nm_p_ctry/tot_p_ctry
drop if wiod_name == "ROW"

levelsof wiod_name, local(ctry_range) c
levelsof wiod_name if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND" | wiod_name=="BRA"), local(ROW_candidates) c //note that we don't want to include turkey, india and brazil at this late stage so we force it in the ROW)
	foreach b of local ctry_range { 
	 if `: list b in ROW_candidates' { 
		local List_ROW `List_ROW' `b'
	 } 
	else {
		local Li_Ctrys `Li_Ctrys' `b'
	}	
 } 
	 
di "`List_ROW'"
di "`Li_Ctrys'"

global test `Li_Ctrys'
replace c = 999 if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND"| wiod_name=="BRA")
replace wiod_name = "ROW" if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND"| wiod_name=="BRA")

collapse (mean) chng_* , by(year month date c s wiod_code wiod_name)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2010 & year >= 1995

gen chng_ppicpi = chng_ppi
replace chng_ppicpi = chng_cpi_serv if wiod_code == "Srest"
drop chng_cpi_serv
* Drop BGR for before 1998: outlier
replace  chng_ppicpi = . if wiod_code == "Srest" & wiod_name == "BGR" & date < ym(1998,1)


********************************************************************************
** Interpolation of missing PPI values
********************************************************************************
egen ID = group(c s)
xtset ID date, m
gen _extr_chng_ppi =  0
replace _extr_chng_ppi =  1 if chng_ppi == .
bys wiod_name date: egen ctry_miss_ppi = mean(_extr_chng_ppi)

gen _extr_chng_ppicpi =  0
replace _extr_chng_ppicpi =  1 if chng_ppicpi == .
bys wiod_name date: egen ctry_miss_ppicpi = mean(_extr_chng_ppicpi)
	
levelsof wiod_name, local(ctry_range) c
levelsof wiod_code, local(sctr_range) c
local startyear = $YEAR_START
local endyear = $YEAR_END

*Imputation
** only PPI
foreach nn in `ctry_range' {
	foreach cc in `sctr_range' {
	*di "`nn' `cc'"
		qui sum chng_ppi if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
		if r(N) > 0 {
			qui regress chng_ppi i.month if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
		else {
			di "exception for empty bucket triggered in: ppi `nn' `cc'"
			qui xtreg chng_ppi i.month if wiod_n == "`nn'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred

		}
	}
}

qui sum _extr_chng_ppi if _extr_chng_ppi == 1 & year <= `endyear' & year >= `startyear'
local tot_extr = r(N)
qui sum _extr_chng_ppi if year <= `endyear' & year >= `startyear'
local tot = r(N)
local sh_etrap = `tot_extr'/`tot'

qui sum _extr_chng_ppi if chng_ppi != . & year <= `endyear' & year >= `startyear'
local tot_stillnm = r(N)
local sh_nonmiss = `tot_stillnm'/`tot'
di " "
di " "
di "-----------------Extrapolation Report--------------------"
di "the data set is only extraplolated in the period from `startyear' to `endyear'"
di " "
di "Share of interpoalted observations in variable chng_ppi is: `sh_etrap'"
di " "
di "Share of non-missing observations in variable ppi after extrapolation: `sh_nonmiss'"

count if mi(chng_ppi)

** PPI + CPI
foreach nn in `ctry_range' {
	foreach cc in `sctr_range' {
	*di "`nn' `cc'"
		qui sum chng_ppicpi if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
		if r(N) > 0 {
			qui regress chng_ppicpi i.month if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppicpi = tmp_pred  if _extr_chng_ppicpi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
		else {
			di "exception for empty bucket triggered in: ppi `nn' `cc'"
			qui xtreg chng_ppicpi i.month if wiod_n == "`nn'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppicpi = tmp_pred  if _extr_chng_ppicpi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred

		}
	}
}

qui sum _extr_chng_ppicpi if _extr_chng_ppicpi == 1 & year <= `endyear' & year >= `startyear'
local tot_extr = r(N)
qui sum _extr_chng_ppicpi if year <= `endyear' & year >= `startyear'
local tot = r(N)
local sh_etrap = `tot_extr'/`tot'

qui sum _extr_chng_ppicpi if chng_ppicpi != . & year <= `endyear' & year >= `startyear'
local tot_stillnm = r(N)
local sh_nonmiss = `tot_stillnm'/`tot'
di " "
di " "
di "-----------------Extrapolation Report--------------------"
di "the data set is only extraplolated in the period from `startyear' to `endyear'"
di " "
di "Share of interpoalted observations in variable chng_ppicpi is: `sh_etrap'"
di " "
di "Share of non-missing observations in variable ppi after extrapolation: `sh_nonmiss'"

count if mi(chng_ppicpi)


save "$outputfolder\temp_ppi_31cty18sec.dta", replace

********************************************************************************
* Generate aggregated PPI change(Dppi) using 2002 total output weights
********************************************************************************
use year country country_ind total_output if year==2002 using "$wiodfolder\WIOD_ordered.dta",clear
* Keep only one observation per wiod_name wiod_code
bys country country_ind: gen count = _n
keep if count == 1
	
* Collapse ROW
local List_ROW "BRA CYP EST IDN IND LUX LVA MLT RO SVK TUR"
foreach cc of local List_ROW {
	replace country = "ROW" if country=="`cc'"
}
		
collapse total_output, by(year country country_ind)

* Drop Srest sectors
local Srest_candidates "50 51 52 h 60 61 62 63 64 j 70 71t74 l m n o p f"
foreach sect of local Srest_candidates {
	drop if country_ind == "`sect'"
}

rename country wiod_name
rename country_ind wiod_code

* Compute output shares
keep wiod_name wiod_code total_output
bys wiod_name: egen tot = total(total_output)
gen share_output = total_output / tot
keep wiod_name wiod_code share_output
save "$wiodfolder\temp_wiod2002.dta", replace

********************************************************************************
* Merge pre-imputation-PPI with weights 
********************************************************************************
use "$outputfolder\temp_ppi_31cty18sec.dta",clear
merge m:1 wiod_name wiod_code using "$wiodfolder\temp_wiod2002.dta"
count if _merge==1 & wiod_code !="Srest"
assert r(N) == 0
if r(N) != 0 {
	display "PROBLEM when merging price data with 2002 output to construct aggregate"
}

drop if _merge == 2
drop _merge

* Construct the aggregate ppi & save
gen dppiTshare = chng_ppi * share_output
bys year month wiod_name: egen Dppi_agg = total(dppiTshare)
preserve
rename wiod_name country
rename wiod_code country_ind
collapse (firstnm) Dppi_agg, by(year month country)
save "$outputfolder\constructed_aggregated_ppi_31cty18sec.dta", replace
restore

********************************************************************************
* V.a. Replace the Srest with the constructed aggregate & save
********************************************************************************
preserve
replace chng_ppi = Dppi_agg if wiod_code == "Srest" 
drop Dppi_agg share_output dppiTshare 
rename wiod_code country_ind
rename wiod_name country

order date country country_ind
sort date country country_ind
format chng_ppi %12.0g

outsheet using "$outputfolder\PPI_MATLAB_monthly_trunc_31cty18sec_srest_agg.csv", comma replace
drop c s ID
save "$outputfolder\NM_Price_changes_monthly_trunc_31cty18sec_srest_agg.dta", replace
restore

********************************************************************************
* V.b. Replace the Srest with the CPI services & save
********************************************************************************
preserve
*replace chng_ppi = chng_cpi_serv if wiod_code == "Srest"
*replace chng_ppi = Dppi_agg if wiod_code == "Srest" & mi(chng_cpi_serv)
drop Dppi_agg share_output dppiTshare chng_ppi
rename chng_ppicpi chng_ppi
rename wiod_code country_ind
rename wiod_name country

order date country country_ind
sort date country country_ind
format chng_ppi %12.0g

outsheet using "$outputfolder\PPI_MATLAB_monthly_trunc_31cty18sec_srest_cpi.csv", comma replace
drop c s ID
save "$outputfolder\NM_Price_changes_monthly_trunc_31cty18sec_srest_cpi.dta", replace
restore


********************************************************************************
* Export both to excel:
********************************************************************************
* Export to csv as a vector to be used in Matlab - srest_agg
use "$outputfolder\NM_Price_changes_monthly_trunc_31cty18sec_srest_agg.dta", clear
forvalues mo = 1/12 {
	forvalues yr = $YEAR_START/$YEAR_END {
		preserve 
		
		keep date year month country country_ind chng_ppi
		order date year month country country_ind chng_ppi
		keep if year == `yr' & month == `mo'
		
		outsheet using "$matlabfolder\PPI_`yr'`mo'_31cty18sec_srest_agg.csv", comma replace
		restore	
	}
}


* Export to csv as a vector to be used in Matlab - srest_cpi
use "$outputfolder\NM_Price_changes_monthly_trunc_31cty18sec_srest_cpi.dta", clear
forvalues mo = 1/12 {
	forvalues yr = $YEAR_START/$YEAR_END {
		preserve 
		
		keep date year month country country_ind chng_ppi
		order date year month country country_ind chng_ppi
		keep if year == `yr' & month == `mo'
		
		outsheet using "$matlabfolder\PPI_`yr'`mo'_31cty18sec_srest_cpi.csv", comma replace
		restore
		
	}
}

********************************************************************************
* VI. 35 sectors 31 countries
* Prepare PPI: collapse to final sector list & interpolate
********************************************************************************
use "$outputfolder\PI_ordered_monthly.dta", clear

egen ID = group(c s)
gen date = ym(year,month)
format date %tg
xtset ID date, m

	
** Create log-changes
gen chng_ppi = ln(ppi) - ln(l1.ppi)	

levelsof wiod_code, local(sctr_range) c
local Srest_candidates "50 51 52 h 60 61 62 63 64 j 70 71t74 l m n o p f"
gen d_service = 0
foreach ss in `Srest_candidates' {
	replace chng_ppi = . if wiod_code == "`ss'"
	replace d_service = 1 if wiod_code == "`ss'" // dummy 1 if it's a service sector
}

foreach b of local sctr_range {
	 if `: list b in Srest_candidates' {
		local List_Srest `List_Srest' `b'
	 }
	else {
		local Li_Sectors `Li_Sectors' `b'
	}
 }
di "`List_Srest'"
di "`Li_Sectors'" 


* Generate an indicator showing the number of observations in nonservice sectors by country
gen tmp_tot_dummy = 1 if d_service == 0
bysort wiod_name: egen tot_p_ctry = total(tmp_tot_dummy) 
* Generate an indicator showing the number of non missing observations in nonservice sectors by country
gen tmp_nm_dummy = 0 if d_service == 0
replace tmp_nm_dummy = 1 if chng_ppi != .& d_service == 0
bysort wiod_name: egen nm_p_ctry = total(tmp_nm_dummy) 
gen nm_share = nm_p_ctry/tot_p_ctry
drop if wiod_name == "ROW"

levelsof wiod_name, local(ctry_range) c
*levelsof wiod_name if (nm_share < 0.4 | wiod_name == "TUR" | wiod_name == "IND" | wiod_name=="BRA"), local(ROW_candidates) c //note that we don't want to include turkey, india and brazil at this late stage so we force it in the ROW)
local ROW_candidates "BRA CYP EST IDN IND LUX LVA MLT SVK TUR"
foreach b of local ctry_range { 
	 if `: list b in ROW_candidates' { 
		local List_ROW `List_ROW' `b'
	 } 
	else {
		local Li_Ctrys `Li_Ctrys' `b'
	}	
 } 
	 
di "`List_ROW'"
di "`Li_Ctrys'"
global test `Li_Ctrys'

foreach nn in `ROW_candidates' {
	replace c = 999 if wiod_name == "`nn'"
	replace wiod_name = "ROW" if wiod_name == "`nn'"
}


collapse (mean) chng_ppi d_service, by(year month date c s wiod_code wiod_name)
*tabulate wiod_n wiod_c if chng_ppi != . & year <= 2010 & year >= 1995

********************************************************************************
** Interpolation of missing PPI values 
********************************************************************************
egen ID = group(c s)
xtset ID date, m
	gen _extr_chng_ppi =  0
	replace _extr_chng_ppi =  1 if chng_ppi == .
	bys wiod_name date: egen ctry_miss_ppi = mean(_extr_chng_ppi)

levelsof wiod_name, local(ctry_range) c
levelsof wiod_code, local(sctr_range) c
local startyear = $YEAR_START
local endyear = $YEAR_END

* Imputation
foreach nn in `ctry_range' {
	
	foreach cc in `sctr_range' {
		quietly: sum d_service if wiod_c == "`cc'"
		if r(mean) == 1 {
			continue
		}
		di "`nn' `cc'"
		qui sum chng_ppi if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
		if r(N) > 0 {
			qui regress chng_ppi i.month if wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
		else {
			di "exception for empty bucket triggered in: ppi `nn' `cc'"
			qui xtreg chng_ppi i.month if wiod_n == "`nn'" & year <= `endyear' & year >= `startyear'
			predict tmp_pred, xb
			qui replace chng_ppi = tmp_pred  if _extr_chng_ppi == 1 & wiod_n == "`nn'" & wiod_c == "`cc'" & year <= `endyear' & year >= `startyear'
			drop tmp_pred
		}
	}
}

qui sum _extr_chng_ppi if _extr_chng_ppi == 1 & year <= `endyear' & year >= `startyear'
local tot_extr = r(N)
qui sum _extr_chng_ppi if year <= `endyear' & year >= `startyear'
local tot = r(N)
local sh_etrap = `tot_extr'/`tot'

qui sum _extr_chng_ppi if chng_ppi != . & year <= `endyear' & year >= `startyear'
local tot_stillnm = r(N)
local sh_nonmiss = `tot_stillnm'/`tot'
di " "
di " "
di "-----------------Extrapolation Report--------------------"
di "the data set is only extraplolated in the period from `startyear' to `endyear'"
di " "
di "Share of interpoalted observations in variable chng_ppi is: `sh_etrap'"
di " "
di "Share of non-missing observations in variable ppi after extrapolation: `sh_nonmiss'"

********************************************************************************
count if mi(chng_ppi) & d_service == 0
count if mi(chng_ppi) & d_service == 1

rename wiod_code country_ind
rename wiod_name country

* Price changes are assigned to the partner country
gen partner = country
gen partner_ind = country_ind
order date partner partner_ind
sort date partner partner_ind
format chng_ppi %12.0g

outsheet using "$outputfolder\PPI_MATLAB_monthly_trunc_31cty35sec.csv", comma replace
drop c s ID
save "$outputfolder\NM_Price_changes_monthly_trunc_31cty35sec.dta", replace

********************************************************************************
* Export to csv as a vector to be used in Matlab
********************************************************************************
use "$outputfolder\NM_Price_changes_monthly_trunc_31cty35sec.dta", clear
forvalues mo = 1/12 {
	forvalues yr = $YEAR_START/$YEAR_END {
		preserve 
		
		keep date year month country country_ind chng_ppi
		keep if year == `yr' & month == `mo'
		
		outsheet using "$matlabfolder\PPI_`yr'`mo'_31cty35sec.csv", comma replace
		restore
		
	}
}

